package data_deepprocessing.algorithm.cvalue_ncvalue_tfidf.tf_idf;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import data_deepprocessing.algorithm.cvalue_ncvalue_tfidf.util.StopListYYH;

/** 
* @author  作者 : YUHU YUAN
* @date 创建时间：2017年3月12日 上午11:05:44 
* @version 1.0  
*/

public class TfIdfService {
	
	
	public TfIdfService(StopListYYH stopListYYH, List<String> termList, List<String> sentenceList) {
		super();
		this.stopListYYH = stopListYYH;
		this.termList = termList;
		this.sentenceList = sentenceList;
	}


	private StopListYYH stopListYYH;

	
	/**
	 * 句子中所有词的出现频率信息
	 */
	private Map<String, Integer> wordFreMap = new HashMap<>();
	
	
	/**
	 * 句子中term词的出现频率信息
	 */
	private Map<String, Integer> termFreMap = new HashMap<>();
	
	/**
	 * 总的句子数
	 */
	private int totalDocs;
	
	/**
	 * 所有过滤后的词总的频率数
	 */
	private int totalTermFreq;
	
	/**
	 * term在句子中的出现频率
	 */
	private Map<String, Integer> termInDocCountMap = new HashMap<>();
	
	private List<String> termList;
	
	private List<String> sentenceList;
	
	
	
	public void execute(List<String> terms, List<String> sentences){
		for(String term: terms){
			if(term.equals("")||term==null){
				continue;
			}
			
			for(String sentence : sentences){
				if(sentence.equals("")||sentence==null){
					continue;
				}
				generateTermInDocCountMap(term, sentence);
				generateTermFreMap(term, sentence);
				generateWordFreMap(sentence);
			}
		}
		generateTotalTermFreq();
		generateTotalDocs();
	}
	
	
	
	private void generateTotalTermFreq(){
		for(String key : wordFreMap.keySet()){
			totalTermFreq +=wordFreMap.get(key);
		}
	}
	
	private void generateTotalDocs(){
		totalDocs = sentenceList.size();
	}
	
	private void generateTermInDocCountMap(String term, String sentence){
		if(sentence.contains(term)){
			termInDocCountMap.put(term, termInDocCountMap.get(term)+1);
		}else{
			termInDocCountMap.put(term, 1);
		}
	}
	
	/** 
	* @author  作者 : YUHU YUAN
	* @date 创建时间：2017年3月12日 上午10:19:21 
	* @parameter 
	* @return
	* @throws
	* 统计term 在一个句子出现的次数
	*/
	private void generateTermFreMap(String term, String sentence){
		int count = 0;
		Pattern pattern = Pattern.compile("("+term+")+");
		Matcher matcher = pattern.matcher(sentence);
		while(matcher.find()){
			System.out.println(matcher.groupCount());
			System.out.println(matcher.group(1));
			count++;
		}
		if(termFreMap.containsKey(term)){
			termFreMap.put(term, termFreMap.get(term)+count);
		}else{
			termFreMap.put(term, count);
		}
	}
	
	/** 
	* @author  作者 : YUHU YUAN
	* @date 创建时间：2017年3月12日 上午10:19:53 
	* @parameter 
	* @return
	* @throws
	*/
	private void generateWordFreMap(String sentence){
		String[] words = sentence.split(" ");
		for(String word : words){
			if(termList.contains(word)||stopListYYH.isStopWord(word)){
				continue;
			}
			if(wordFreMap.containsKey(word.trim())){
				wordFreMap.put(word, wordFreMap.get(word)+1);
			}else{
				wordFreMap.put(word, 1);
			}
		}
	}



	public StopListYYH getStopListYYH() {
		return stopListYYH;
	}



	public void setStopListYYH(StopListYYH stopListYYH) {
		this.stopListYYH = stopListYYH;
	}



	public Map<String, Integer> getWordFreMap() {
		return wordFreMap;
	}



	public void setWordFreMap(Map<String, Integer> wordFreMap) {
		this.wordFreMap = wordFreMap;
	}



	public Map<String, Integer> getTermFreMap() {
		return termFreMap;
	}



	public void setTermFreMap(Map<String, Integer> termFreMap) {
		this.termFreMap = termFreMap;
	}



	public int getTotalDocs() {
		return totalDocs;
	}



	public void setTotalDocs(int totalDocs) {
		this.totalDocs = totalDocs;
	}



	public int getTotalTermFreq() {
		return totalTermFreq;
	}



	public void setTotalTermFreq(int totalTermFreq) {
		this.totalTermFreq = totalTermFreq;
	}



	public Map<String, Integer> getTermInDocCountMap() {
		return termInDocCountMap;
	}



	public void setTermInDocCountMap(Map<String, Integer> termInDocCountMap) {
		this.termInDocCountMap = termInDocCountMap;
	}



	public List<String> getTermList() {
		return termList;
	}



	public List<String> getSentenceList() {
		return sentenceList;
	}

	
}





